# Alex Gazmararian
# agazmararian@gmail.com
source("analysis/visibility/analysis_config.R")

g <- readRDS(here("data", "output", "visibility_analysis.rds"))

message("Project visibility (weighted): ", round(weighted.mean(g$greenproj_bin, g$wt_all_trim), 2)) 

message("Sample size by survey wave:")
n_total <- nrow(g)
writeLines(as.character(n_total), paste0("output/pnas/stats/n_total_survey.txt"))

message("Spring 2024 Finance: ", nrow(subset(g, sample == "Spring2024Finance")))
message("Spring 2024 Roosevelt: ", nrow(subset(g, sample == "Spring2024Roosevelt")))
message("Summer 2024 CBAM: ", nrow(subset(g, sample == "Summer2024CBAM")))

message("Survey wave start dates:")
message("Spring 2024 Finance: ", min(subset(g, sample == "Spring2024Finance")$start_date), " to ", max(subset(g, sample == "Spring2024Finance")$start_date))
message("Spring 2024 Roosevelt: ", min(subset(g, sample == "Spring2024Roosevelt")$start_date), " to ", max(subset(g, sample == "Spring2024Roosevelt")$start_date))
message("Summer 2024 CBAM: ", min(subset(g, sample == "Summer2024CBAM")$start_date), " to ", max(subset(g, sample == "Summer2024CBAM")$start_date))

# Compare the sample mean statistics for respondent-level characteristics
g %>%
  group_by(sample) %>%
  mutate(n = n()) %>%
  summarize(
    across(
      c(age, female, black, asian, otherrace, hispanic, college,
        employ_bin, incomeq1, incomeq2, incomeq3, incomeq4, incomeq5, dem, rep, gw_idx, n),
      ~ mean(.x)
    )
  ) %>%
  pivot_longer(!sample) %>%
  pivot_wider(names_from = sample, values_from = value) %>%
  mutate(
    name = case_when(
      name == "age" ~ "Age",
      name == "female" ~ "Female",
      name == "black" ~ "Black",
      name == "asian" ~ "Asian",
      name == "otherrace" ~ "Other race",
      name == "hispanic" ~ "Hispanic/Latino",
      name == "college" ~ "College",
      name == "employ_bin" ~ "Employed",
      name == "incomeq1" ~ "Income Q1",
      name == "incomeq2" ~ "Income Q2",
      name == "incomeq3" ~ "Income Q3",
      name == "incomeq4" ~ "Income Q4",
      name == "incomeq5" ~ "Income Q5",
      name == "dem" ~ "Democrat",
      name == "rep" ~ "Republican",
      name == "gw_idx" ~ "Global Warming Index",
      name == "n" ~ "$N$"
    )
  ) %>%
  tt(
    caption = "Survey sample summaries",
    notes = "\\textit{Notes:} Prefer not to say income answers imputed with median household income. Column header shows month/day of survey wave start and end dates. Global warming index ranges from 0 to 1, where larger values indicate greater concern."
  ) %>%
  setNames(c(" ", "3/14--4/9", "5/13--6/6", "8/6--11/11")) %>%
  format_tt(
    digits = 2,
    num_fmt = "significant_cell"
  ) %>%
  group_tt(
    j = list("2024 Field Date" = 2:4)
  ) %>%
  theme_latex(placement = "H") %>%
  save_tt(here("output", "pnas", "tables", "tab_S2_sample_comparison.tex"), overwrite = TRUE)
  
# Summary for all variables in analysis across all respondents
# Credit variables only asked in Spring2024Finance and Summer2024CBAM samples
g_credit <- g %>% filter(sample %in% c("Spring2024Finance", "Summer2024CBAM"))
n_credit <- nrow(g_credit)

# Helper function to compute summary stats
compute_summary <- function(df, vars) {
  df %>%
    summarize(
      across(
        all_of(vars),
        .fns = list(
          "mean" = ~ mean(.x, na.rm = TRUE),
          "sd" = ~ sd(.x, na.rm = TRUE),
          "min" = ~ min(.x, na.rm = TRUE),
          "max" = ~ max(.x, na.rm = TRUE),
          "na" = ~ sum(is.na(.x))
        ),
        .names = "{.col}.{.fn}"
      )
    ) %>%
    pivot_longer(cols = everything()) %>%
    separate_wider_delim(cols = name, delim = ".", names = c("variable", "statistic")) %>%
    pivot_wider(names_from = statistic, values_from = value)
}

# Variables asked in all samples
main_vars <- c(
  "greenproj_bin",
  "age", "female", "black", "asian", "otherrace", "hispanic", "college",
  "employ_bin", "incomeq1", "incomeq2", "incomeq3", "incomeq4", "incomeq5", "dem", "rep", "gw_idx",
  "urate", "force_ln", "gdp_ln", "income_pc", "highway", "college_acs", "poverty_acs", "housing_acs", "foreign_acs",
  "popd", "bb100_bin", "demshare"
)

# Credit variables only in subset of samples
credit_vars <- c("credit_biden_bin", "credit_state_bin", "credit_cong_bin", "credit_com_bin", "credit_market_bin")

# Compute summaries separately
main_summary <- compute_summary(g, main_vars)
credit_summary <- compute_summary(g_credit, credit_vars)

# Combine with credit variables in the proper position (after greenproj_bin)
bind_rows(
  main_summary %>% filter(variable == "greenproj_bin"),
  credit_summary,
  main_summary %>% filter(variable != "greenproj_bin")
) %>%
  rename(name = variable) %>%
  mutate(
    name = case_when(
      name == "greenproj_bin" ~ "Sees clean energy project",
      name == "credit_biden_bin" ~ "Credits Biden$^\\dagger$",
      name == "credit_gov_bin" ~ "Credits Governor$^\\dagger$",
      name == "credit_cong_bin" ~ "Credits Congress$^\\dagger$",
      name == "credit_state_bin" ~ "Credits State$^\\dagger$",
      name == "credit_com_bin" ~ "Credits Local Officials$^\\dagger$",
      name == "credit_market_bin" ~ "Credits Markets$^\\dagger$",
      name == "age" ~ "Age",
      name == "female" ~ "Female",
      name == "black" ~ "Black",
      name == "asian" ~ "Asian",
      name == "otherrace" ~ "Other race",
      name == "hispanic" ~ "Hispanic/Latino",
      name == "college" ~ "College",
      name == "employ_bin" ~ "Employed",
      name == "incomeq1" ~ "Income Q1",
      name == "incomeq2" ~ "Income Q2",
      name == "incomeq3" ~ "Income Q3",
      name == "incomeq4" ~ "Income Q4",
      name == "incomeq5" ~ "Income Q5",
      name == "dem" ~ "Democrat",
      name == "rep" ~ "Republican",
      name == "gw_idx" ~ "Global warming index",
      name == "urate" ~ "Unemployment rate",
      name == "force_ln" ~ "Labor force (log) ($t-1$)",
      name == "gdp_ln" ~ "County GDP (log) ($t-1$)",
      name == "income_pc" ~ "County income pc ($t-1$)",
      name == "highway" ~ "Highway access",
      name == "college_acs" ~ "County college share ($t-1$)",
      name == "poverty_acs" ~ "County poverty share ($t-1$)",
      name == "housing_acs" ~ "Median county housing costs ($t-1$)",
      name == "foreign_acs" ~ "County foreign-born share ($t-1$)",
      name == "popd" ~ "Population density",
      name == "bb100_bin" ~ "Faster broadband access",
      name == "demshare" ~ "County 2020 Biden vote share",
      T ~ as.character(name)
    )
  ) %>%
  tt(
    caption = "Proximity analysis summary statistics",
    notes = paste(
      "Notes: Summary statistics across all survey samples ($N=", nrow(g), "$).",
      "Analyses standardize continuous county-level measures with the within-state variance.",
      "$^\\dagger$Credit attribution questions asked only in two of three survey waves ($N=", n_credit, "$).",
      sep = " "
    ),
    escape = FALSE
  ) %>%
  format_tt(
    digits = 2,
    num_fmt = "significant_cell"
  ) %>%
  setNames(c(" ", "Mean", "SD", "Min", "Max", "NA")) %>%
  theme_latex(resize_width = 0.63, resize_direction = "both", placement = "H") %>%
  save_tt(here("output", "pnas", "tables", "tab_S5_summary_statistics.tex"), overwrite = TRUE)
